This files contains an example of tuning a multiple models with BayesSearchCV.
import pickle
import time
import helpsk as hlp
import pandas as pd
import numpy as np
import plotly.express as px
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, MinMaxScaler, OneHotEncoder
import plotly.io as pio
pio.renderers.default='notebook'
with open('../X_train.pkl', 'rb') as handle:
X_train = pickle.load(handle)
with open('../y_train.pkl', 'rb') as handle:
y_train = pickle.load(handle)
hlp.pandas.numeric_summary(X_train, return_style=True)
| # of Non-Nulls | # of Nulls | % Nulls | # of Zeros | % Zeros | Mean | St Dev. | Coef of Var | Skewness | Kurtosis | Min | 10% | 25% | 50% | 75% | 90% | Max | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| duration | 760 | 40 | 5.0% | 0 | 0.0% | 21.0 | 11.7 | 0.6 | 1.0 | 0.6 | 4.0 | 9.0 | 12.0 | 18.0 | 24.0 | 36.0 | 60.0 |
| credit_amount | 800 | 0 | 0.0% | 38 | 5.0% | 3,203.9 | 2,932.3 | 0.9 | 1.9 | 3.9 | 0.0 | 753.9 | 1,300.8 | 2,236.5 | 3,951.5 | 7,394.6 | 18,424.0 |
| installment_commitment | 800 | 0 | 0.0% | 0 | 0.0% | 3.0 | 1.1 | 0.4 | -0.5 | -1.2 | 1.0 | 1.0 | 2.0 | 3.0 | 4.0 | 4.0 | 4.0 |
| residence_since | 800 | 0 | 0.0% | 0 | 0.0% | 2.9 | 1.1 | 0.4 | -0.3 | -1.4 | 1.0 | 1.0 | 2.0 | 3.0 | 4.0 | 4.0 | 4.0 |
| age | 800 | 0 | 0.0% | 0 | 0.0% | 35.6 | 11.4 | 0.3 | 1.0 | 0.7 | 19.0 | 23.0 | 27.0 | 33.0 | 42.0 | 52.0 | 75.0 |
| existing_credits | 800 | 0 | 0.0% | 0 | 0.0% | 1.4 | 0.6 | 0.4 | 1.3 | 1.6 | 1.0 | 1.0 | 1.0 | 1.0 | 2.0 | 2.0 | 4.0 |
| num_dependents | 800 | 0 | 0.0% | 0 | 0.0% | 1.1 | 0.3 | 0.3 | 2.0 | 2.1 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 2.0 | 2.0 |
hlp.pandas.non_numeric_summary(X_train, return_style=True)
| # of Non-Nulls | # of Nulls | % Nulls | Most Freq. Value | # of Unique | % Unique | |
|---|---|---|---|---|---|---|
| checking_status | 763 | 37 | 4.6% | no checking | 4 | 0.5% |
| credit_history | 800 | 0 | 0.0% | existing paid | 5 | 0.6% |
| purpose | 800 | 0 | 0.0% | radio/tv | 10 | 1.2% |
| savings_status | 800 | 0 | 0.0% | <100 | 5 | 0.6% |
| employment | 800 | 0 | 0.0% | 1<=X<4 | 5 | 0.6% |
| personal_status | 800 | 0 | 0.0% | male single | 4 | 0.5% |
| other_parties | 800 | 0 | 0.0% | none | 3 | 0.4% |
| property_magnitude | 800 | 0 | 0.0% | car | 4 | 0.5% |
| other_payment_plans | 800 | 0 | 0.0% | none | 3 | 0.4% |
| housing | 800 | 0 | 0.0% | own | 3 | 0.4% |
| job | 800 | 0 | 0.0% | skilled | 4 | 0.5% |
| own_telephone | 800 | 0 | 0.0% | none | 2 | 0.2% |
| foreign_worker | 800 | 0 | 0.0% | yes | 2 | 0.2% |
y_train[0:10]
array([1, 1, 0, 1, 0, 1, 0, 1, 1, 0])
np.unique(y_train, return_counts=True)
(array([0, 1]), array([559, 241]))
np.unique(y_train, return_counts=True)[1] / np.sum(np.unique(y_train, return_counts=True)[1])
array([0.69875, 0.30125])
search_space = hlp.sklearn_search.ClassifierSearchSpace(data=X_train)
# pip install scikit-optimize
from skopt import BayesSearchCV
from skopt.space import Real, Categorical, Integer
from sklearn.model_selection import RepeatedKFold
bayes_search = BayesSearchCV(
estimator=search_space.pipeline(),
search_spaces=search_space.search_spaces(),
cv=RepeatedKFold(n_splits=5, n_repeats=2), # 5 fold 2 repeat CV
scoring='roc_auc',
refit=False, # required if passing in multiple scorers
return_train_score=False,
n_jobs=-1,
verbose=1,
random_state=42,
)
start_time = time.time()
bayes_search.fit(X_train, y_train)
elapsed_time = time.time() - start_time
print(f"Elapsed time to run BayesSearchCV: {elapsed_time:.3f} seconds; {elapsed_time / 60:.1f} minutes")
print(bayes_search.best_score_)
print(bayes_search.best_params_)
results = hlp.sklearn_eval.MLExperimentResults.from_sklearn_search_cv(
searcher=bayes_search,
higher_score_is_better=True,
description='BayesSearchCV using ClassifierSearchSpace',
parameter_name_mappings=search_space.param_name_mappings()
)
results.to_yaml_file(yaml_file_name = 'Run 1 - Multi-model - BayesSearchCV.yaml')
results = hlp.sklearn_eval.MLExperimentResults.from_yaml_file(yaml_file_name = 'Run 1 - Multi-model - BayesSearchCV.yaml')
results.best_score
0.7785470802977034
results.best_params
{'model': 'LogisticRegression(...)',
'C': 0.1321714160260592,
'imputer': "SimpleImputer(strategy='most_frequent')",
'scaler': 'StandardScaler()',
'encoder': 'OneHotEncoder()'}
results.to_formatted_dataframe(return_style=True, include_rank=True)
| rank | roc_auc Mean | roc_auc 95CI.LO | roc_auc 95CI.HI | model | max_features | max_depth | min_samples_split | min_samples_leaf | max_samples | criterion | C | learning_rate | n_estimators | min_child_weight | subsample | colsample_bytree | colsample_bylevel | reg_alpha | reg_lambda | imputer | scaler | encoder |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 0.779 | 0.754 | 0.803 | LogisticRegression(...) | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | 0.132 | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer(strategy='most_frequent') | StandardScaler() | OneHotEncoder() |
| 2 | 0.777 | 0.756 | 0.798 | LogisticRegression(...) | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | 0.132 | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer(strategy='most_frequent') | StandardScaler() | OneHotEncoder() |
| 3 | 0.775 | 0.751 | 0.800 | RandomForestClassifier(...) | 0.010 | 70.000 | 2.000 | 6.000 | 0.923 | entropy | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer(strategy='median') | None | OneHotEncoder() |
| 4 | 0.775 | 0.748 | 0.802 | LinearSVC() | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | 0.021 | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer(strategy='most_frequent') | StandardScaler() | OneHotEncoder() |
| 5 | 0.775 | 0.735 | 0.814 | LogisticRegression(...) | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | 0.137 | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer(strategy='most_frequent') | StandardScaler() | OneHotEncoder() |
| 6 | 0.774 | 0.748 | 0.801 | LogisticRegression(...) | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | 0.130 | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer(strategy='most_frequent') | StandardScaler() | OneHotEncoder() |
| 7 | 0.774 | 0.747 | 0.801 | RandomForestClassifier(...) | 0.010 | 100.000 | 50.000 | 11.000 | 1.000 | gini | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer() | None | OneHotEncoder() |
| 8 | 0.773 | 0.762 | 0.785 | RandomForestClassifier(...) | 0.010 | 100.000 | 21.000 | 10.000 | 1.000 | entropy | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer() | None | CustomOrdinalEncoder() |
| 9 | 0.773 | 0.756 | 0.790 | RandomForestClassifier(...) | 0.010 | 100.000 | 2.000 | 7.000 | 0.500 | entropy | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer() | None | CustomOrdinalEncoder() |
| 10 | 0.773 | 0.752 | 0.793 | LogisticRegression(...) | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | 0.133 | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer(strategy='most_frequent') | StandardScaler() | OneHotEncoder() |
| 11 | 0.773 | 0.750 | 0.795 | LinearSVC() | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | 0.024 | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer() | MinMaxScaler() | OneHotEncoder() |
| 12 | 0.773 | 0.746 | 0.799 | LogisticRegression(...) | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | 0.130 | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer(strategy='most_frequent') | StandardScaler() | OneHotEncoder() |
| 13 | 0.772 | 0.737 | 0.808 | RandomForestClassifier(...) | 0.010 | 100.000 | 50.000 | 12.000 | 1.000 | entropy | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer() | None | OneHotEncoder() |
| 14 | 0.772 | 0.747 | 0.798 | RandomForestClassifier(...) | 0.010 | 100.000 | 50.000 | 1.000 | 1.000 | entropy | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer() | None | OneHotEncoder() |
| 15 | 0.772 | 0.747 | 0.797 | LinearSVC() | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | 0.006 | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer(strategy='most_frequent') | StandardScaler() | OneHotEncoder() |
| 16 | 0.772 | 0.752 | 0.792 | LinearSVC() | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | 0.023 | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer() | StandardScaler() | OneHotEncoder() |
| 17 | 0.772 | 0.750 | 0.794 | LogisticRegression(...) | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | 0.135 | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer(strategy='most_frequent') | StandardScaler() | OneHotEncoder() |
| 18 | 0.772 | 0.749 | 0.795 | LogisticRegression(...) | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | 0.132 | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer(strategy='most_frequent') | StandardScaler() | OneHotEncoder() |
| 19 | 0.772 | 0.749 | 0.794 | ExtraTreesClassifier(...) | 0.010 | 70.000 | 50.000 | 1.000 | 1.000 | entropy | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer() | None | CustomOrdinalEncoder() |
| 20 | 0.772 | 0.740 | 0.803 | RandomForestClassifier(...) | 0.023 | 85.000 | 18.000 | 21.000 | 0.987 | entropy | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer(strategy='most_frequent') | None | OneHotEncoder() |
| 21 | 0.772 | 0.754 | 0.789 | ExtraTreesClassifier(...) | 0.010 | 42.000 | 50.000 | 1.000 | 0.500 | entropy | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer() | None | CustomOrdinalEncoder() |
| 22 | 0.771 | 0.736 | 0.807 | LinearSVC() | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | 0.024 | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer() | StandardScaler() | OneHotEncoder() |
| 23 | 0.771 | 0.749 | 0.794 | RandomForestClassifier(...) | 0.010 | 100.000 | 50.000 | 13.000 | 1.000 | gini | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer() | None | OneHotEncoder() |
| 24 | 0.771 | 0.755 | 0.788 | LinearSVC() | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | 0.023 | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer(strategy='most_frequent') | MinMaxScaler() | OneHotEncoder() |
| 25 | 0.771 | 0.742 | 0.799 | ExtraTreesClassifier(...) | 0.239 | 52.000 | 50.000 | 1.000 | 0.500 | entropy | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer(strategy='median') | None | CustomOrdinalEncoder() |
| 26 | 0.770 | 0.740 | 0.800 | RandomForestClassifier(...) | 0.048 | 94.000 | 34.000 | 12.000 | 0.899 | gini | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer(strategy='median') | None | OneHotEncoder() |
| 27 | 0.770 | 0.740 | 0.799 | LogisticRegression(...) | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | 0.134 | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer(strategy='most_frequent') | StandardScaler() | OneHotEncoder() |
| 28 | 0.770 | 0.736 | 0.803 | LinearSVC() | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | 0.023 | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer() | StandardScaler() | OneHotEncoder() |
| 29 | 0.770 | 0.751 | 0.788 | ExtraTreesClassifier(...) | 0.010 | 47.000 | 50.000 | 1.000 | 0.500 | entropy | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer(strategy='most_frequent') | None | CustomOrdinalEncoder() |
| 30 | 0.769 | 0.743 | 0.795 | LogisticRegression(...) | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | 0.134 | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer(strategy='most_frequent') | StandardScaler() | OneHotEncoder() |
| 31 | 0.769 | 0.742 | 0.796 | LinearSVC() | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | 0.026 | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer(strategy='most_frequent') | MinMaxScaler() | OneHotEncoder() |
| 32 | 0.769 | 0.734 | 0.804 | RandomForestClassifier(...) | 0.010 | 58.000 | 50.000 | 1.000 | 0.602 | entropy | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer() | None | OneHotEncoder() |
| 33 | 0.769 | 0.745 | 0.793 | ExtraTreesClassifier(...) | 0.010 | 61.000 | 50.000 | 1.000 | 0.500 | entropy | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer() | None | CustomOrdinalEncoder() |
| 34 | 0.769 | 0.742 | 0.796 | ExtraTreesClassifier(...) | 0.010 | 100.000 | 2.000 | 1.000 | 0.710 | entropy | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer(strategy='median') | None | CustomOrdinalEncoder() |
| 35 | 0.769 | 0.739 | 0.798 | LinearSVC() | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | 0.023 | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer(strategy='most_frequent') | MinMaxScaler() | OneHotEncoder() |
| 36 | 0.769 | 0.740 | 0.797 | LinearSVC() | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | 0.024 | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer(strategy='most_frequent') | MinMaxScaler() | OneHotEncoder() |
| 37 | 0.769 | 0.739 | 0.798 | ExtraTreesClassifier(...) | 0.118 | 50.000 | 37.000 | 1.000 | 0.947 | entropy | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer(strategy='most_frequent') | None | OneHotEncoder() |
| 38 | 0.769 | 0.745 | 0.792 | LinearSVC() | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | 0.025 | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer() | StandardScaler() | OneHotEncoder() |
| 39 | 0.768 | 0.753 | 0.784 | LogisticRegression(...) | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | 0.133 | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer(strategy='most_frequent') | StandardScaler() | OneHotEncoder() |
| 40 | 0.768 | 0.750 | 0.787 | LinearSVC() | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | 0.024 | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer(strategy='most_frequent') | MinMaxScaler() | OneHotEncoder() |
| 41 | 0.768 | 0.751 | 0.786 | LinearSVC() | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | 0.040 | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer(strategy='most_frequent') | MinMaxScaler() | OneHotEncoder() |
| 42 | 0.768 | 0.743 | 0.793 | LogisticRegression(...) | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | 0.133 | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer(strategy='most_frequent') | StandardScaler() | OneHotEncoder() |
| 43 | 0.768 | 0.736 | 0.800 | RandomForestClassifier(...) | 0.010 | 100.000 | 50.000 | 1.000 | 1.000 | entropy | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer(strategy='most_frequent') | None | CustomOrdinalEncoder() |
| 44 | 0.768 | 0.746 | 0.790 | RandomForestClassifier(...) | 0.010 | 100.000 | 50.000 | 15.000 | 1.000 | gini | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer() | None | OneHotEncoder() |
| 45 | 0.768 | 0.739 | 0.797 | ExtraTreesClassifier(...) | 0.010 | 100.000 | 50.000 | 1.000 | 0.962 | entropy | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer(strategy='median') | None | CustomOrdinalEncoder() |
| 46 | 0.768 | 0.750 | 0.785 | RandomForestClassifier(...) | 0.021 | 8.000 | 47.000 | 10.000 | 0.695 | gini | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer(strategy='most_frequent') | None | CustomOrdinalEncoder() |
| 47 | 0.767 | 0.735 | 0.800 | LinearSVC() | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | 0.025 | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer() | StandardScaler() | OneHotEncoder() |
| 48 | 0.767 | 0.746 | 0.788 | ExtraTreesClassifier(...) | 0.036 | 78.000 | 42.000 | 1.000 | 0.568 | gini | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer() | None | CustomOrdinalEncoder() |
| 49 | 0.767 | 0.748 | 0.786 | ExtraTreesClassifier(...) | 0.010 | 89.000 | 41.000 | 1.000 | 0.901 | entropy | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer(strategy='most_frequent') | None | OneHotEncoder() |
| 50 | 0.767 | 0.735 | 0.799 | LinearSVC() | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | 0.025 | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer() | MinMaxScaler() | OneHotEncoder() |
results.to_formatted_dataframe(query='model == "XGBClassifier(...)"', sort_by_score=False)
| roc_auc Mean | roc_auc 95CI.LO | roc_auc 95CI.HI | max_depth | learning_rate | n_estimators | min_child_weight | subsample | colsample_bytree | colsample_bylevel | reg_alpha | reg_lambda | imputer | encoder |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0.732 | 0.702 | 0.762 | 2.000 | 0.186 | 261.000 | 18.000 | 0.820 | 0.973 | 0.738 | 0.091 | 1.706 | SimpleImputer(strategy='median') | CustomOrdinalEncoder() |
| 0.741 | 0.722 | 0.761 | 1.000 | 0.449 | 242.000 | 3.000 | 0.787 | 0.918 | 0.691 | 0.030 | 2.709 | SimpleImputer() | CustomOrdinalEncoder() |
| 0.703 | 0.682 | 0.725 | 2.000 | 0.261 | 1,066.000 | 21.000 | 0.692 | 0.780 | 0.801 | 0.002 | 2.561 | SimpleImputer(strategy='most_frequent') | CustomOrdinalEncoder() |
| 0.695 | 0.668 | 0.721 | 8.000 | 0.153 | 1,510.000 | 15.000 | 0.873 | 0.543 | 0.701 | 0.853 | 1.133 | SimpleImputer(strategy='median') | OneHotEncoder() |
| 0.708 | 0.679 | 0.737 | 19.000 | 0.232 | 1,219.000 | 35.000 | 0.897 | 0.651 | 0.696 | 0.001 | 1.762 | SimpleImputer(strategy='median') | CustomOrdinalEncoder() |
| 0.753 | 0.741 | 0.765 | 1.000 | 0.446 | 118.000 | 3.000 | 0.753 | 0.620 | 0.826 | 0.010 | 1.001 | SimpleImputer(strategy='most_frequent') | OneHotEncoder() |
| 0.709 | 0.672 | 0.746 | 20.000 | 0.462 | 559.000 | 3.000 | 0.826 | 0.618 | 0.647 | 0.048 | 1.341 | SimpleImputer(strategy='most_frequent') | OneHotEncoder() |
| 0.673 | 0.644 | 0.702 | 3.000 | 0.445 | 1,962.000 | 37.000 | 0.639 | 0.758 | 0.821 | 0.387 | 3.109 | SimpleImputer(strategy='median') | CustomOrdinalEncoder() |
| 0.733 | 0.710 | 0.755 | 4.000 | 0.224 | 106.000 | 3.000 | 0.920 | 0.838 | 0.824 | 0.000 | 1.895 | SimpleImputer(strategy='median') | CustomOrdinalEncoder() |
| 0.702 | 0.684 | 0.721 | 4.000 | 0.157 | 1,423.000 | 9.000 | 0.985 | 0.658 | 0.671 | 0.001 | 2.761 | SimpleImputer(strategy='median') | OneHotEncoder() |
| 0.500 | <NA> | <NA> | 4.000 | 0.500 | 157.000 | 69.000 | 0.500 | 0.681 | 0.628 | 0.002 | 3.287 | SimpleImputer() | CustomOrdinalEncoder() |
| 0.760 | 0.737 | 0.782 | 1.000 | 0.500 | 100.000 | 1.000 | 0.818 | 0.500 | 1.000 | 0.000 | 1.463 | SimpleImputer() | OneHotEncoder() |
| 0.500 | <NA> | <NA> | 100.000 | 0.171 | 100.000 | 100.000 | 0.785 | 0.500 | 0.500 | 1.000 | 3.520 | SimpleImputer(strategy='most_frequent') | CustomOrdinalEncoder() |
| 0.500 | <NA> | <NA> | 1.000 | 0.500 | 2,000.000 | 100.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | SimpleImputer(strategy='most_frequent') | OneHotEncoder() |
| 0.743 | 0.720 | 0.766 | 1.000 | 0.500 | 100.000 | 3.000 | 0.500 | 0.562 | 0.921 | 0.118 | 1.000 | SimpleImputer(strategy='most_frequent') | OneHotEncoder() |
| 0.757 | 0.733 | 0.780 | 1.000 | 0.362 | 100.000 | 1.000 | 0.955 | 0.691 | 0.835 | 0.000 | 1.000 | SimpleImputer(strategy='most_frequent') | OneHotEncoder() |
| 0.762 | 0.728 | 0.796 | 1.000 | 0.147 | 100.000 | 2.000 | 0.975 | 0.500 | 0.937 | 0.000 | 1.000 | SimpleImputer(strategy='median') | OneHotEncoder() |
| 0.733 | 0.708 | 0.759 | 100.000 | 0.000 | 100.000 | 23.000 | 1.000 | 1.000 | 0.500 | 0.000 | 1.000 | SimpleImputer() | CustomOrdinalEncoder() |
| 0.703 | 0.678 | 0.728 | 100.000 | 0.445 | 2,000.000 | 1.000 | 0.745 | 0.676 | 0.781 | 0.159 | 1.429 | SimpleImputer(strategy='median') | OneHotEncoder() |
| 0.764 | 0.739 | 0.788 | 1.000 | 0.466 | 100.000 | 1.000 | 1.000 | 0.500 | 0.967 | 0.001 | 2.485 | SimpleImputer() | OneHotEncoder() |
| 0.722 | 0.699 | 0.745 | 1.000 | 0.026 | 100.000 | 6.000 | 0.891 | 0.802 | 0.531 | 0.099 | 1.045 | SimpleImputer(strategy='median') | OneHotEncoder() |
| 0.748 | 0.723 | 0.773 | 1.000 | 0.402 | 100.000 | 1.000 | 1.000 | 0.500 | 0.905 | 0.312 | 4.000 | SimpleImputer() | OneHotEncoder() |
| 0.718 | 0.690 | 0.745 | 1.000 | 0.205 | 2,000.000 | 1.000 | 0.759 | 0.812 | 1.000 | 0.000 | 1.275 | SimpleImputer() | OneHotEncoder() |
| 0.744 | 0.725 | 0.762 | 1.000 | 0.500 | 100.000 | 1.000 | 0.569 | 0.768 | 1.000 | 0.000 | 1.000 | SimpleImputer(strategy='most_frequent') | CustomOrdinalEncoder() |
| 0.761 | 0.741 | 0.782 | 1.000 | 0.500 | 100.000 | 2.000 | 1.000 | 0.500 | 1.000 | 0.000 | 1.000 | SimpleImputer(strategy='most_frequent') | OneHotEncoder() |
| 0.763 | 0.726 | 0.800 | 1.000 | 0.478 | 100.000 | 1.000 | 1.000 | 0.722 | 1.000 | 0.006 | 1.886 | SimpleImputer(strategy='median') | OneHotEncoder() |
| 0.728 | 0.706 | 0.750 | 1.000 | 0.008 | 100.000 | 12.000 | 0.500 | 0.684 | 1.000 | 0.000 | 2.085 | SimpleImputer(strategy='median') | OneHotEncoder() |
| 0.763 | 0.747 | 0.779 | 1.000 | 0.361 | 100.000 | 1.000 | 1.000 | 0.578 | 0.713 | 0.018 | 1.965 | SimpleImputer(strategy='median') | OneHotEncoder() |
| 0.764 | 0.741 | 0.787 | 1.000 | 0.279 | 100.000 | 25.000 | 1.000 | 0.977 | 0.991 | 1.000 | 1.000 | SimpleImputer(strategy='most_frequent') | OneHotEncoder() |
| 0.728 | 0.710 | 0.745 | 1.000 | 0.000 | 100.000 | 1.000 | 1.000 | 0.500 | 0.517 | 0.000 | 1.834 | SimpleImputer() | OneHotEncoder() |
| 0.720 | 0.692 | 0.749 | 1.000 | 0.000 | 100.000 | 26.000 | 1.000 | 0.609 | 0.619 | 0.467 | 4.000 | SimpleImputer(strategy='median') | OneHotEncoder() |
| 0.749 | 0.725 | 0.774 | 1.000 | 0.500 | 100.000 | 21.000 | 1.000 | 0.956 | 1.000 | 0.027 | 1.000 | SimpleImputer() | OneHotEncoder() |
| 0.761 | 0.732 | 0.790 | 1.000 | 0.500 | 100.000 | 1.000 | 1.000 | 0.500 | 0.914 | 0.588 | 1.000 | SimpleImputer() | OneHotEncoder() |
| 0.756 | 0.718 | 0.795 | 1.000 | 0.500 | 100.000 | 1.000 | 1.000 | 0.500 | 0.500 | 0.011 | 1.724 | SimpleImputer() | CustomOrdinalEncoder() |
| 0.733 | 0.707 | 0.759 | 1.000 | 0.000 | 100.000 | 24.000 | 1.000 | 0.787 | 0.646 | 0.000 | 1.000 | SimpleImputer() | CustomOrdinalEncoder() |
| 0.762 | 0.731 | 0.793 | 1.000 | 0.500 | 100.000 | 1.000 | 1.000 | 1.000 | 1.000 | 0.001 | 1.000 | SimpleImputer(strategy='most_frequent') | OneHotEncoder() |
| 0.744 | 0.717 | 0.772 | 1.000 | 0.500 | 100.000 | 8.000 | 1.000 | 1.000 | 0.863 | 0.000 | 1.000 | SimpleImputer(strategy='most_frequent') | CustomOrdinalEncoder() |
| 0.764 | 0.745 | 0.782 | 1.000 | 0.406 | 100.000 | 1.000 | 1.000 | 0.733 | 0.878 | 0.000 | 1.000 | SimpleImputer() | OneHotEncoder() |
| 0.727 | 0.696 | 0.757 | 100.000 | 0.366 | 100.000 | 23.000 | 1.000 | 0.869 | 1.000 | 1.000 | 1.000 | SimpleImputer(strategy='median') | CustomOrdinalEncoder() |
| 0.758 | 0.740 | 0.777 | 1.000 | 0.500 | 100.000 | 1.000 | 1.000 | 1.000 | 0.547 | 0.001 | 1.000 | SimpleImputer(strategy='median') | OneHotEncoder() |
| 0.760 | 0.739 | 0.781 | 1.000 | 0.500 | 100.000 | 1.000 | 1.000 | 1.000 | 1.000 | 0.000 | 1.000 | SimpleImputer() | OneHotEncoder() |
| 0.761 | 0.730 | 0.793 | 1.000 | 0.387 | 100.000 | 1.000 | 1.000 | 0.738 | 1.000 | 0.000 | 1.368 | SimpleImputer() | OneHotEncoder() |
| 0.758 | 0.737 | 0.779 | 1.000 | 0.395 | 100.000 | 1.000 | 1.000 | 0.525 | 0.879 | 0.731 | 1.458 | SimpleImputer(strategy='most_frequent') | OneHotEncoder() |
| 0.757 | 0.730 | 0.784 | 1.000 | 0.500 | 100.000 | 5.000 | 1.000 | 0.911 | 0.649 | 0.233 | 4.000 | SimpleImputer(strategy='median') | OneHotEncoder() |
| 0.752 | 0.728 | 0.777 | 1.000 | 0.394 | 100.000 | 1.000 | 1.000 | 0.648 | 0.912 | 0.000 | 1.334 | SimpleImputer() | OneHotEncoder() |
| 0.751 | 0.731 | 0.771 | 1.000 | 0.500 | 100.000 | 1.000 | 1.000 | 0.824 | 0.649 | 0.110 | 3.777 | SimpleImputer(strategy='most_frequent') | OneHotEncoder() |
| 0.721 | 0.707 | 0.734 | 1.000 | 0.000 | 2,000.000 | 2.000 | 1.000 | 0.500 | 1.000 | 0.000 | 1.000 | SimpleImputer(strategy='most_frequent') | CustomOrdinalEncoder() |
| 0.751 | 0.727 | 0.775 | 1.000 | 0.335 | 100.000 | 1.000 | 1.000 | 0.961 | 0.608 | 0.001 | 1.000 | SimpleImputer(strategy='median') | CustomOrdinalEncoder() |
| 0.762 | 0.729 | 0.795 | 1.000 | 0.358 | 100.000 | 1.000 | 1.000 | 0.647 | 1.000 | 0.000 | 1.000 | SimpleImputer(strategy='median') | OneHotEncoder() |
| 0.746 | 0.718 | 0.773 | 1.000 | 0.500 | 100.000 | 1.000 | 1.000 | 0.684 | 0.828 | 0.000 | 1.000 | SimpleImputer(strategy='most_frequent') | OneHotEncoder() |
results.to_formatted_dataframe(query='model == "XGBClassifier(...)"')
| roc_auc Mean | roc_auc 95CI.LO | roc_auc 95CI.HI | max_depth | learning_rate | n_estimators | min_child_weight | subsample | colsample_bytree | colsample_bylevel | reg_alpha | reg_lambda | imputer | encoder |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0.764 | 0.739 | 0.788 | 1.000 | 0.466 | 100.000 | 1.000 | 1.000 | 0.500 | 0.967 | 0.001 | 2.485 | SimpleImputer() | OneHotEncoder() |
| 0.764 | 0.745 | 0.782 | 1.000 | 0.406 | 100.000 | 1.000 | 1.000 | 0.733 | 0.878 | 0.000 | 1.000 | SimpleImputer() | OneHotEncoder() |
| 0.764 | 0.741 | 0.787 | 1.000 | 0.279 | 100.000 | 25.000 | 1.000 | 0.977 | 0.991 | 1.000 | 1.000 | SimpleImputer(strategy='most_frequent') | OneHotEncoder() |
| 0.763 | 0.747 | 0.779 | 1.000 | 0.361 | 100.000 | 1.000 | 1.000 | 0.578 | 0.713 | 0.018 | 1.965 | SimpleImputer(strategy='median') | OneHotEncoder() |
| 0.763 | 0.726 | 0.800 | 1.000 | 0.478 | 100.000 | 1.000 | 1.000 | 0.722 | 1.000 | 0.006 | 1.886 | SimpleImputer(strategy='median') | OneHotEncoder() |
| 0.762 | 0.729 | 0.795 | 1.000 | 0.358 | 100.000 | 1.000 | 1.000 | 0.647 | 1.000 | 0.000 | 1.000 | SimpleImputer(strategy='median') | OneHotEncoder() |
| 0.762 | 0.731 | 0.793 | 1.000 | 0.500 | 100.000 | 1.000 | 1.000 | 1.000 | 1.000 | 0.001 | 1.000 | SimpleImputer(strategy='most_frequent') | OneHotEncoder() |
| 0.762 | 0.728 | 0.796 | 1.000 | 0.147 | 100.000 | 2.000 | 0.975 | 0.500 | 0.937 | 0.000 | 1.000 | SimpleImputer(strategy='median') | OneHotEncoder() |
| 0.761 | 0.730 | 0.793 | 1.000 | 0.387 | 100.000 | 1.000 | 1.000 | 0.738 | 1.000 | 0.000 | 1.368 | SimpleImputer() | OneHotEncoder() |
| 0.761 | 0.741 | 0.782 | 1.000 | 0.500 | 100.000 | 2.000 | 1.000 | 0.500 | 1.000 | 0.000 | 1.000 | SimpleImputer(strategy='most_frequent') | OneHotEncoder() |
| 0.761 | 0.732 | 0.790 | 1.000 | 0.500 | 100.000 | 1.000 | 1.000 | 0.500 | 0.914 | 0.588 | 1.000 | SimpleImputer() | OneHotEncoder() |
| 0.760 | 0.739 | 0.781 | 1.000 | 0.500 | 100.000 | 1.000 | 1.000 | 1.000 | 1.000 | 0.000 | 1.000 | SimpleImputer() | OneHotEncoder() |
| 0.760 | 0.737 | 0.782 | 1.000 | 0.500 | 100.000 | 1.000 | 0.818 | 0.500 | 1.000 | 0.000 | 1.463 | SimpleImputer() | OneHotEncoder() |
| 0.758 | 0.740 | 0.777 | 1.000 | 0.500 | 100.000 | 1.000 | 1.000 | 1.000 | 0.547 | 0.001 | 1.000 | SimpleImputer(strategy='median') | OneHotEncoder() |
| 0.758 | 0.737 | 0.779 | 1.000 | 0.395 | 100.000 | 1.000 | 1.000 | 0.525 | 0.879 | 0.731 | 1.458 | SimpleImputer(strategy='most_frequent') | OneHotEncoder() |
| 0.757 | 0.730 | 0.784 | 1.000 | 0.500 | 100.000 | 5.000 | 1.000 | 0.911 | 0.649 | 0.233 | 4.000 | SimpleImputer(strategy='median') | OneHotEncoder() |
| 0.757 | 0.733 | 0.780 | 1.000 | 0.362 | 100.000 | 1.000 | 0.955 | 0.691 | 0.835 | 0.000 | 1.000 | SimpleImputer(strategy='most_frequent') | OneHotEncoder() |
| 0.756 | 0.718 | 0.795 | 1.000 | 0.500 | 100.000 | 1.000 | 1.000 | 0.500 | 0.500 | 0.011 | 1.724 | SimpleImputer() | CustomOrdinalEncoder() |
| 0.753 | 0.741 | 0.765 | 1.000 | 0.446 | 118.000 | 3.000 | 0.753 | 0.620 | 0.826 | 0.010 | 1.001 | SimpleImputer(strategy='most_frequent') | OneHotEncoder() |
| 0.752 | 0.728 | 0.777 | 1.000 | 0.394 | 100.000 | 1.000 | 1.000 | 0.648 | 0.912 | 0.000 | 1.334 | SimpleImputer() | OneHotEncoder() |
| 0.751 | 0.731 | 0.771 | 1.000 | 0.500 | 100.000 | 1.000 | 1.000 | 0.824 | 0.649 | 0.110 | 3.777 | SimpleImputer(strategy='most_frequent') | OneHotEncoder() |
| 0.751 | 0.727 | 0.775 | 1.000 | 0.335 | 100.000 | 1.000 | 1.000 | 0.961 | 0.608 | 0.001 | 1.000 | SimpleImputer(strategy='median') | CustomOrdinalEncoder() |
| 0.749 | 0.725 | 0.774 | 1.000 | 0.500 | 100.000 | 21.000 | 1.000 | 0.956 | 1.000 | 0.027 | 1.000 | SimpleImputer() | OneHotEncoder() |
| 0.748 | 0.723 | 0.773 | 1.000 | 0.402 | 100.000 | 1.000 | 1.000 | 0.500 | 0.905 | 0.312 | 4.000 | SimpleImputer() | OneHotEncoder() |
| 0.746 | 0.718 | 0.773 | 1.000 | 0.500 | 100.000 | 1.000 | 1.000 | 0.684 | 0.828 | 0.000 | 1.000 | SimpleImputer(strategy='most_frequent') | OneHotEncoder() |
| 0.744 | 0.717 | 0.772 | 1.000 | 0.500 | 100.000 | 8.000 | 1.000 | 1.000 | 0.863 | 0.000 | 1.000 | SimpleImputer(strategy='most_frequent') | CustomOrdinalEncoder() |
| 0.744 | 0.725 | 0.762 | 1.000 | 0.500 | 100.000 | 1.000 | 0.569 | 0.768 | 1.000 | 0.000 | 1.000 | SimpleImputer(strategy='most_frequent') | CustomOrdinalEncoder() |
| 0.743 | 0.720 | 0.766 | 1.000 | 0.500 | 100.000 | 3.000 | 0.500 | 0.562 | 0.921 | 0.118 | 1.000 | SimpleImputer(strategy='most_frequent') | OneHotEncoder() |
| 0.741 | 0.722 | 0.761 | 1.000 | 0.449 | 242.000 | 3.000 | 0.787 | 0.918 | 0.691 | 0.030 | 2.709 | SimpleImputer() | CustomOrdinalEncoder() |
| 0.733 | 0.708 | 0.759 | 100.000 | 0.000 | 100.000 | 23.000 | 1.000 | 1.000 | 0.500 | 0.000 | 1.000 | SimpleImputer() | CustomOrdinalEncoder() |
| 0.733 | 0.710 | 0.755 | 4.000 | 0.224 | 106.000 | 3.000 | 0.920 | 0.838 | 0.824 | 0.000 | 1.895 | SimpleImputer(strategy='median') | CustomOrdinalEncoder() |
| 0.733 | 0.707 | 0.759 | 1.000 | 0.000 | 100.000 | 24.000 | 1.000 | 0.787 | 0.646 | 0.000 | 1.000 | SimpleImputer() | CustomOrdinalEncoder() |
| 0.732 | 0.702 | 0.762 | 2.000 | 0.186 | 261.000 | 18.000 | 0.820 | 0.973 | 0.738 | 0.091 | 1.706 | SimpleImputer(strategy='median') | CustomOrdinalEncoder() |
| 0.728 | 0.706 | 0.750 | 1.000 | 0.008 | 100.000 | 12.000 | 0.500 | 0.684 | 1.000 | 0.000 | 2.085 | SimpleImputer(strategy='median') | OneHotEncoder() |
| 0.728 | 0.710 | 0.745 | 1.000 | 0.000 | 100.000 | 1.000 | 1.000 | 0.500 | 0.517 | 0.000 | 1.834 | SimpleImputer() | OneHotEncoder() |
| 0.727 | 0.696 | 0.757 | 100.000 | 0.366 | 100.000 | 23.000 | 1.000 | 0.869 | 1.000 | 1.000 | 1.000 | SimpleImputer(strategy='median') | CustomOrdinalEncoder() |
| 0.722 | 0.699 | 0.745 | 1.000 | 0.026 | 100.000 | 6.000 | 0.891 | 0.802 | 0.531 | 0.099 | 1.045 | SimpleImputer(strategy='median') | OneHotEncoder() |
| 0.721 | 0.707 | 0.734 | 1.000 | 0.000 | 2,000.000 | 2.000 | 1.000 | 0.500 | 1.000 | 0.000 | 1.000 | SimpleImputer(strategy='most_frequent') | CustomOrdinalEncoder() |
| 0.720 | 0.692 | 0.749 | 1.000 | 0.000 | 100.000 | 26.000 | 1.000 | 0.609 | 0.619 | 0.467 | 4.000 | SimpleImputer(strategy='median') | OneHotEncoder() |
| 0.718 | 0.690 | 0.745 | 1.000 | 0.205 | 2,000.000 | 1.000 | 0.759 | 0.812 | 1.000 | 0.000 | 1.275 | SimpleImputer() | OneHotEncoder() |
| 0.709 | 0.672 | 0.746 | 20.000 | 0.462 | 559.000 | 3.000 | 0.826 | 0.618 | 0.647 | 0.048 | 1.341 | SimpleImputer(strategy='most_frequent') | OneHotEncoder() |
| 0.708 | 0.679 | 0.737 | 19.000 | 0.232 | 1,219.000 | 35.000 | 0.897 | 0.651 | 0.696 | 0.001 | 1.762 | SimpleImputer(strategy='median') | CustomOrdinalEncoder() |
| 0.703 | 0.682 | 0.725 | 2.000 | 0.261 | 1,066.000 | 21.000 | 0.692 | 0.780 | 0.801 | 0.002 | 2.561 | SimpleImputer(strategy='most_frequent') | CustomOrdinalEncoder() |
| 0.703 | 0.678 | 0.728 | 100.000 | 0.445 | 2,000.000 | 1.000 | 0.745 | 0.676 | 0.781 | 0.159 | 1.429 | SimpleImputer(strategy='median') | OneHotEncoder() |
| 0.702 | 0.684 | 0.721 | 4.000 | 0.157 | 1,423.000 | 9.000 | 0.985 | 0.658 | 0.671 | 0.001 | 2.761 | SimpleImputer(strategy='median') | OneHotEncoder() |
| 0.695 | 0.668 | 0.721 | 8.000 | 0.153 | 1,510.000 | 15.000 | 0.873 | 0.543 | 0.701 | 0.853 | 1.133 | SimpleImputer(strategy='median') | OneHotEncoder() |
| 0.673 | 0.644 | 0.702 | 3.000 | 0.445 | 1,962.000 | 37.000 | 0.639 | 0.758 | 0.821 | 0.387 | 3.109 | SimpleImputer(strategy='median') | CustomOrdinalEncoder() |
| 0.500 | <NA> | <NA> | 4.000 | 0.500 | 157.000 | 69.000 | 0.500 | 0.681 | 0.628 | 0.002 | 3.287 | SimpleImputer() | CustomOrdinalEncoder() |
| 0.500 | <NA> | <NA> | 100.000 | 0.171 | 100.000 | 100.000 | 0.785 | 0.500 | 0.500 | 1.000 | 3.520 | SimpleImputer(strategy='most_frequent') | CustomOrdinalEncoder() |
| 0.500 | <NA> | <NA> | 1.000 | 0.500 | 2,000.000 | 100.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | SimpleImputer(strategy='most_frequent') | OneHotEncoder() |
results.to_formatted_dataframe(query='model == "LogisticRegression(...)"', num_rows=20)
| roc_auc Mean | roc_auc 95CI.LO | roc_auc 95CI.HI | C | imputer | scaler | encoder |
|---|---|---|---|---|---|---|
| 0.779 | 0.754 | 0.803 | 0.132 | SimpleImputer(strategy='most_frequent') | StandardScaler() | OneHotEncoder() |
| 0.777 | 0.756 | 0.798 | 0.132 | SimpleImputer(strategy='most_frequent') | StandardScaler() | OneHotEncoder() |
| 0.775 | 0.735 | 0.814 | 0.137 | SimpleImputer(strategy='most_frequent') | StandardScaler() | OneHotEncoder() |
| 0.774 | 0.748 | 0.801 | 0.130 | SimpleImputer(strategy='most_frequent') | StandardScaler() | OneHotEncoder() |
| 0.773 | 0.752 | 0.793 | 0.133 | SimpleImputer(strategy='most_frequent') | StandardScaler() | OneHotEncoder() |
| 0.773 | 0.746 | 0.799 | 0.130 | SimpleImputer(strategy='most_frequent') | StandardScaler() | OneHotEncoder() |
| 0.772 | 0.750 | 0.794 | 0.135 | SimpleImputer(strategy='most_frequent') | StandardScaler() | OneHotEncoder() |
| 0.772 | 0.749 | 0.795 | 0.132 | SimpleImputer(strategy='most_frequent') | StandardScaler() | OneHotEncoder() |
| 0.770 | 0.740 | 0.799 | 0.134 | SimpleImputer(strategy='most_frequent') | StandardScaler() | OneHotEncoder() |
| 0.769 | 0.743 | 0.795 | 0.134 | SimpleImputer(strategy='most_frequent') | StandardScaler() | OneHotEncoder() |
| 0.768 | 0.753 | 0.784 | 0.133 | SimpleImputer(strategy='most_frequent') | StandardScaler() | OneHotEncoder() |
| 0.768 | 0.743 | 0.793 | 0.133 | SimpleImputer(strategy='most_frequent') | StandardScaler() | OneHotEncoder() |
| 0.767 | 0.734 | 0.800 | 0.135 | SimpleImputer(strategy='most_frequent') | StandardScaler() | OneHotEncoder() |
| 0.767 | 0.742 | 0.792 | 0.237 | SimpleImputer() | MinMaxScaler() | OneHotEncoder() |
| 0.767 | 0.736 | 0.798 | 0.013 | SimpleImputer(strategy='most_frequent') | StandardScaler() | OneHotEncoder() |
| 0.767 | 0.742 | 0.791 | 0.136 | SimpleImputer(strategy='most_frequent') | StandardScaler() | OneHotEncoder() |
| 0.766 | 0.747 | 0.785 | 0.142 | SimpleImputer(strategy='most_frequent') | StandardScaler() | OneHotEncoder() |
| 0.766 | 0.735 | 0.797 | 0.891 | SimpleImputer(strategy='most_frequent') | None | OneHotEncoder() |
| 0.766 | 0.742 | 0.790 | 0.125 | SimpleImputer(strategy='most_frequent') | StandardScaler() | OneHotEncoder() |
| 0.765 | 0.728 | 0.802 | 0.122 | SimpleImputer(strategy='most_frequent') | StandardScaler() | OneHotEncoder() |
# gives the score rank for each index
# e.g. array([4, 2, 1, 3)
# the 1st iteration (i.e. set of params) was the worst
# the 3rd iteration was the best.
results.trial_rankings
array([187, 172, 188, 84, 154, 152, 202, 181, 145, 139, 168, 102, 71,
190, 192, 194, 74, 163, 166, 76, 103, 210, 37, 49, 199, 158,
21, 78, 33, 193, 165, 83, 25, 183, 57, 72, 140, 185, 34,
196, 138, 143, 29, 48, 159, 225, 80, 19, 45, 105, 216, 171,
207, 234, 198, 131, 104, 206, 201, 219, 170, 56, 217, 135, 155,
208, 150, 73, 125, 245, 218, 246, 55, 82, 121, 63, 100, 215,
212, 92, 60, 27, 5, 30, 2, 67, 77, 79, 59, 52, 81,
10, 39, 12, 6, 130, 17, 42, 1, 18, 66, 129, 111, 176,
223, 197, 122, 180, 88, 46, 9, 43, 149, 189, 167, 3, 156,
14, 148, 51, 141, 26, 86, 108, 32, 8, 85, 118, 146, 116,
95, 44, 93, 137, 98, 120, 133, 106, 23, 61, 178, 7, 13,
175, 127, 160, 20, 75, 220, 173, 244, 238, 64, 240, 228, 241,
195, 239, 247, 191, 96, 31, 242, 209, 110, 236, 226, 41, 243,
101, 15, 53, 65, 151, 38, 69, 16, 11, 54, 153, 22, 50,
119, 47, 62, 28, 126, 107, 128, 36, 40, 24, 147, 87, 58,
68, 35, 97, 4, 70, 205, 186, 231, 235, 230, 157, 229, 237,
203, 233, 248, 124, 248, 248, 184, 142, 113, 200, 232, 89, 221,
174, 227, 182, 115, 99, 211, 94, 91, 213, 224, 169, 117, 144,
204, 112, 179, 90, 214, 132, 123, 114, 134, 136, 161, 162, 222,
164, 109, 177])
# gives the
# e.g. results.primary_score_iteration_ranking of array([4, 2, 1, 3)
# would return [2, 1, 4, 0] because index 2 (i.e. 3rd iteration) was the best, so it is the first index;
# and index 0 (i.e. first iteration) was the was
results.best_trial_indexes
array([ 98, 84, 115, 198, 82, 94, 141, 125, 110, 91, 177, 93, 142,
117, 170, 176, 96, 99, 47, 146, 26, 180, 138, 191, 32, 121,
81, 185, 42, 83, 161, 124, 28, 38, 196, 189, 22, 174, 92,
190, 167, 97, 111, 131, 48, 109, 183, 43, 23, 181, 119, 89,
171, 178, 72, 61, 34, 194, 88, 80, 139, 184, 75, 152, 172,
100, 85, 195, 175, 199, 12, 35, 67, 16, 147, 19, 86, 27,
87, 46, 90, 73, 31, 3, 126, 122, 193, 108, 219, 237, 228,
79, 132, 227, 130, 160, 197, 134, 225, 76, 169, 11, 20, 56,
49, 137, 187, 123, 248, 164, 102, 235, 216, 241, 224, 129, 232,
127, 182, 135, 74, 106, 240, 211, 68, 186, 144, 188, 101, 95,
55, 239, 136, 242, 63, 243, 133, 40, 9, 36, 120, 215, 41,
233, 8, 128, 192, 118, 112, 66, 173, 5, 179, 4, 64, 116,
205, 25, 44, 145, 244, 245, 17, 247, 30, 18, 114, 10, 231,
60, 51, 1, 149, 221, 143, 103, 249, 140, 236, 107, 7, 223,
33, 214, 37, 201, 0, 2, 113, 13, 159, 14, 29, 15, 156,
39, 105, 54, 24, 217, 58, 6, 208, 234, 200, 57, 52, 65,
163, 21, 226, 78, 229, 238, 77, 50, 62, 70, 59, 148, 220,
246, 104, 230, 45, 166, 222, 154, 206, 204, 202, 218, 209, 53,
203, 165, 207, 151, 157, 153, 155, 162, 168, 150, 69, 71, 158,
210, 212, 213])
labeled_dataframe = results.to_labeled_dataframe()
labeled_dataframe['Trial Index'] = labeled_dataframe.groupby("model")["Trial Index"].rank(method="first", ascending=True)
labeled_dataframe['Trial Index'].values
array([ 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13.,
14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., 26.,
27., 28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 38., 39.,
40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50., 1., 2.,
3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
16., 17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28.,
29., 30., 31., 32., 33., 34., 35., 36., 37., 38., 39., 40., 41.,
42., 43., 44., 45., 46., 47., 48., 49., 50., 1., 2., 3., 4.,
5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17.,
18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30.,
31., 32., 33., 34., 35., 36., 37., 38., 39., 40., 41., 42., 43.,
44., 45., 46., 47., 48., 49., 50., 1., 2., 3., 4., 5., 6.,
7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19.,
20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
33., 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45.,
46., 47., 48., 49., 50., 1., 2., 3., 4., 5., 6., 7., 8.,
9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21.,
22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., 33., 34.,
35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47.,
48., 49., 50.])
((results.to_labeled_dataframe()['Trial Index'] - 1) % 50) + 1
0 1
1 2
2 3
3 4
4 5
..
245 46
246 47
247 48
248 49
249 50
Name: Trial Index, Length: 250, dtype: int64
def plot_performance_across_trials( size: str = None,
color: str = None,
color_continuous_scale = px.colors.diverging.balance,
facet_col=None,
facet_col_wrap=3,
query=None,
height: float = 600,
width: float = 600 * 1.6):
score_column = results.primary_score_name + " Mean"
title = "Performance Over Time (Across Trials)<br>" \
"<sup>This graph shows the average CV score across all trials, in order of execution.</sup>"
if size is not None:
title = title + f"<br><sup>The size of the point corresponds to the value of <b>'{size}'</b>.</sup>"
labeled_df = results.to_labeled_dataframe(query=query)
if facet_col:
labeled_df['Trial Index'] = labeled_df.groupby(facet_col)["Trial Index"].rank(method="first", ascending=True)
fig = px.scatter(
data_frame=labeled_df,
x='Trial Index',
y=score_column,
size=size,
color=color,
color_continuous_scale=color_continuous_scale,
trendline='lowess',
facet_col=facet_col,
facet_col_wrap=facet_col_wrap,
labels={
score_column: f"Average CV Score<br>({results.primary_score_name})",
},
title=title,
custom_data=['label'],
height=height,
width=width,
)
#fig.update_xaxes(matches=None)
fig.update_traces(
hovertemplate="<br>".join([
"Trial Index: %{x}",
score_column + ": " + "%{y}",
"<br>Parameters: %{customdata[0]}",
])
)
return fig
plot_performance_across_trials()
plot_performance_across_trials(facet_col='model')
plot_performance_across_trials(query='model == "XGBClassifier(...)"')
results.plot_performance_across_trials().show()
results.plot_performance_across_trials(color='model').show()
results.plot_parameter_values_across_trials().show()
def plot_parameter_values_across_trials(self,
query: str = None,
height: float = 600,
width: float = 600 * 1.6):
"""
Returns a Plotly Figure (scatter-plot per numeric parameter) of the parameter's values (y-axis) in
order of trial execution (x-axis). Especially useful for e.g. BayesSearchCV.
Args:
height:
The height of the plot. This value is passed to plotly.
width:
The width of the plot. This value is passed to plotly.
"""
color_continuous_scale = px.colors.diverging.RdYlGn
if not self.higher_score_is_better:
color_continuous_scale = color_continuous_scale.reverse()
score_column = self.primary_score_name + " Mean"
labeled_df = self.to_labeled_dataframe(query)
labeled_long = pd.melt(labeled_df,
id_vars=['Trial Index', score_column, 'label'],
value_vars=[x for x in self.numeric_parameters if x in labeled_df.columns],
var_name='parameter')
fig = px.scatter(
data_frame=labeled_long,
x='Trial Index',
y='value',
color=score_column,
color_continuous_scale=color_continuous_scale,
facet_col='parameter',
facet_col_wrap=3,
trendline='lowess',
labels={
'value': 'Parameter Value',
},
title="Parameter Values Evaluated Over Time (Across Trials)<br>"
"<sup>This graph shows the parameter values evaluated across all trials.<br>"
"The color corresponds to the average CV score associated with that trial/point.</sup>",
custom_data=['label', score_column],
height=height,
width=width,
)
fig.update_traces(
hovertemplate="<br>".join([
"Trial Index: %{x}",
"Parameter Value: %{y}",
score_column + ": %{customdata[1]}",
"<br>Parameters: %{customdata[0]}",
])
)
fig.update_yaxes(matches=None, showticklabels=True)
return fig
plot_parameter_values_across_trials(results, query='model == "XGBClassifier(...)"')
results.plot_scatter_matrix(height=1000, width=1000 * hlp.plot.GOLDEN_RATIO).show()
results.plot_performance_numeric_params(height=800)
/Users/shanekercheval/opt/anaconda3/envs/python-examples/lib/python3.9/site-packages/statsmodels/nonparametric/smoothers_lowess.py:227: RuntimeWarning: invalid value encountered in true_divide
results.plot_parallel_coordinates().show()
results.plot_performance_non_numeric_params()
results.plot_score_vs_parameter(
parameter='learning_rate',
size='colsample_bytree',
color='scaler'
)
results.plot_parameter_vs_parameter(parameter_x='colsample_bytree',
parameter_y='learning_rate',
size='max_depth'
)
results.plot_parameter_vs_parameter(parameter_x='colsample_bytree',
parameter_y='learning_rate',
size='imputer')